{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "binding-delta",
   "metadata": {
    "papermill": {
     "duration": 0.016304,
     "end_time": "2021-03-22T20:29:23.476444",
     "exception": false,
     "start_time": "2021-03-22T20:29:23.460140",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# anomaly_score_unsupervised"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bb97e294-9399-4d96-a95c-8ad7e29a2872",
   "metadata": {},
   "source": [
    "Send arbitrary time-series data to the component and train an unsupervised LSTM-Autoencoder model. The moment unseen patters occur the anomaly score rises.\n",
    "\n",
    "Future work:\n",
    "\n",
    "- reset / rollback model (for regular flushing or after a real anomaly (true positive) occurred)\n",
    "- add check-pointing for service persistence and rollback"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4e63f153-5524-4c3a-8b55-2afb01f2750b",
   "metadata": {
    "papermill": {
     "duration": 0.164002,
     "end_time": "2021-03-22T20:29:25.951504",
     "exception": false,
     "start_time": "2021-03-22T20:29:25.787502",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from numpy import concatenate\n",
    "from matplotlib import pyplot\n",
    "from pandas import read_csv\n",
    "from pandas import DataFrame\n",
    "from pandas import concat\n",
    "import sklearn\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import Dense\n",
    "from tensorflow.keras.layers import LSTM\n",
    "from tensorflow.keras.callbacks import Callback\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import LSTM, Dense, Activation\n",
    "\n",
    "from flask import request\n",
    "from flask import Flask\n",
    "from flask import send_file\n",
    "\n",
    "import time\n",
    "import json\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "import pickle\n",
    "import re\n",
    "import logging\n",
    "import os\n",
    "import sys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "abstract-cambridge",
   "metadata": {
    "papermill": {
     "duration": 0.012801,
     "end_time": "2021-03-22T20:29:25.972462",
     "exception": false,
     "start_time": "2021-03-22T20:29:25.959661",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Number of LSTM layers, default: 3\n",
    "lstm_layers = int(os.environ.get('lstm_layers', '3'))\n",
    "\n",
    "# Number of LSTM layers, default: 10\n",
    "lstm_cells_per_layer = int(os.environ.get('lstm_cells_per_layer', '10'))\n",
    "\n",
    "# LSTM time steps, default: 10\n",
    "timesteps = int(os.environ.get('timesteps', '10'))\n",
    "\n",
    "# Time series dimensionality, default: 3\n",
    "dim = int(os.environ.get('dim', '3'))\n",
    "\n",
    "# batch size, default: 32\n",
    "batch_size = int(os.environ.get('batch_size', '32'))\n",
    "\n",
    "# epochs, default: 3\n",
    "epochs = int(os.environ.get('epochs', '3'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "55f9b39f-2c8c-4ab5-b4f5-513357bf20ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "for element in sys.argv:\n",
    "    logging.warning('argv raw ' +  element)\n",
    "\n",
    "parameters = list(\n",
    "    map(lambda s: re.sub('$', '\"', s),\n",
    "        map(\n",
    "            lambda s: s.replace('=', '=\"'),\n",
    "            filter(\n",
    "                lambda s: s.find('=') > -1 and bool(re.match(r'[A-Za-z0-9_]*=[.\\/A-Za-z0-9]*', s)),\n",
    "                sys.argv\n",
    "            )\n",
    "    )))\n",
    "\n",
    "lstm_layers = int(lstm_layers)\n",
    "lstm_cells_per_layer = int(lstm_cells_per_layer)\n",
    "timesteps = int(timesteps)\n",
    "dim = int(dim)\n",
    "batch_size = int(batch_size)\n",
    "\n",
    "\n",
    "for parameter in parameters:\n",
    "    exec(parameter)\n",
    "    logging.warning('Parameter: ' + parameter)\n",
    "\n",
    "\n",
    "for parameter in parameters:\n",
    "    exec(\"logging.warning('final parameter: ' + str({}))\".format(parameter.split('=')[0]))\n",
    "    exec(\"logging.warning('final parameter type: ' + str(type({})))\".format(parameter.split('=')[0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77a93042-b9fc-417c-9eea-7b2715219616",
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "with open('./anomaly-score-unsupervised/watsoniotp.healthy.phase_aligned.pickle','rb') as file_object:\n",
    "    raw_data = file_object.read()\n",
    "    data_healthy = pickle.loads(raw_data, encoding='latin1')\n",
    "\n",
    "with open('./anomaly-score-unsupervised/watsoniotp.broken.phase_aligned.pickle','rb') as file_object:\n",
    "    raw_data = file_object.read()\n",
    "    data_broken = pickle.loads(raw_data, encoding='latin1')\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "42ad728c-70aa-41b6-9885-40d3b32708f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "data_healthy = data_healthy.reshape(3000,3)\n",
    "data_broken = data_broken.reshape(3000,3)\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d85be76-c42f-4967-9525-5c273ca4efa6",
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "def scaleData(data):\n",
    "    # normalize features\n",
    "    scaler = MinMaxScaler(feature_range=(0, 1))\n",
    "    return scaler.fit_transform(data)\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0315b9fc-14bb-4e14-ac77-08695ef12059",
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "data_healthy_scaled = scaleData(data_healthy)\n",
    "data_broken_scaled = scaleData(data_broken)\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d0f4985f-715b-4ee2-838f-c76056ecc520",
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "data = np.array([[0,1,2,3,4,5,6,7,8,9],[10,11,12,13,14,15,16,17,18,19], [20,21,22,23,24,25,26,27,28,29]])\n",
    "data = data.T\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e0dbf8ef-c486-4c66-8e82-0ccb5b26609f",
   "metadata": {},
   "outputs": [],
   "source": [
    "def lstm_data_transform(data, num_steps=5):\n",
    "    x = []\n",
    "    for i in range(data.shape[0]):\n",
    "        # compute a new (sliding window) index\n",
    "        end_ix = i + num_steps        # if index is larger than the size of the dataset, we stop\n",
    "        if end_ix >= data.shape[0]:\n",
    "            break        # Get a sequence of data for x\n",
    "        seq = data[i:end_ix]\n",
    "        x.append(seq)\n",
    "    return np.array(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "57b3c3ad-435c-47af-84a1-7f5949aa9f21",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\"\"\"\n",
    "samples = 3000\n",
    "data_healthy_scaled_reshaped = lstm_data_transform(data_healthy_scaled, num_steps=timesteps)\n",
    "data_broken_scaled_reshaped = lstm_data_transform(data_broken_scaled, num_steps=timesteps)\n",
    "\n",
    "#reshape to (300,10,3)\n",
    "data_healthy_scaled_reshaped.shape\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "75983ac5-9abd-4d01-b864-8015b18e5e83",
   "metadata": {},
   "outputs": [],
   "source": [
    "loss_history = []\n",
    "loss_history_total = []\n",
    "\n",
    "\n",
    "class LossHistory(Callback):\n",
    "    def on_train_begin(self, logs):\n",
    "        loss_history = [] \n",
    "\n",
    "    def on_train_batch_end(self, batch, logs):\n",
    "        print('Loss on_train_batch_end '+str(logs.get('loss')))\n",
    "        loss_history.append(logs.get('loss'))\n",
    "        loss_history_total.append(logs.get('loss'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7da08143-9d23-4992-8069-c69b45914142",
   "metadata": {},
   "outputs": [],
   "source": [
    "# design network\n",
    "\n",
    "def create_model():\n",
    "    model = Sequential()\n",
    "    for _ in range(lstm_layers):\n",
    "        model.add(LSTM(lstm_cells_per_layer,input_shape=(timesteps,dim),return_sequences=True))\n",
    "    model.add(Dense(dim))\n",
    "    model.compile(loss='mae', optimizer='adam')\n",
    "    return model\n",
    "\n",
    "model = create_model()\n",
    "\n",
    "def train(data):\n",
    "    model.fit(data, data, epochs=epochs, batch_size=batch_size, validation_data=(data, data), verbose=0, shuffle=False,callbacks=[LossHistory()])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2ec81c3b-a41f-46ec-8881-a2aa774da90c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def doNN(data):\n",
    "    #data = scaleData(data)\n",
    "    train(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0cbf5044-042f-4212-8d9e-973eea90480c",
   "metadata": {},
   "outputs": [],
   "source": [
    "app = Flask(__name__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a56724c-033f-45ac-9765-9265c559b078",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "@app.route('/send_data', methods=['POST'])\n",
    "def send_data():\n",
    "    message = request.get_json()\n",
    "    #message = message[1:-1] # get rid of encapsulating quotes\n",
    "    #json_array = json.loads()\n",
    "    data = np.asarray(message)\n",
    "    print(data)\n",
    "    data = lstm_data_transform(data, num_steps=timesteps)\n",
    "    doNN(data)\n",
    "    return json.dumps(loss_history)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "861e3431-3268-4b68-a936-d9808dc13345",
   "metadata": {},
   "outputs": [],
   "source": [
    "@app.route('/reset_model', methods=['GET'])\n",
    "def reset_model():\n",
    "    loss_history = []\n",
    "    loss_history_total = []\n",
    "    model = create_model()\n",
    "    return \"done\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d295a9a2-cb30-407e-9351-d70727506856",
   "metadata": {},
   "outputs": [],
   "source": [
    "@app.route('/get_loss_as_json', methods=['GET'])\n",
    "def get_loss_as_json():\n",
    "    return json.dumps(loss_history_total)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2cbee22f-acba-4347-a8ff-1ef8e67f0344",
   "metadata": {},
   "outputs": [],
   "source": [
    "@app.route('/get_loss_as_image.png', methods=['GET'])\n",
    "def get_loss_as_image():\n",
    "    t = np.arange(0, len(loss_history_total), 1)\n",
    "\n",
    "    fig, ax = plt.subplots()\n",
    "    ax.plot(t, loss_history_total)\n",
    "\n",
    "    ax.set(xlabel='epochs', ylabel='loss',\n",
    "           title='Reconstruction error')\n",
    "    ax.grid()\n",
    "\n",
    "    filename = \"anomalies.png\"\n",
    "    fig.savefig(filename)\n",
    "    return send_file(filename, mimetype='image/png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1b61e945-3b45-4aa3-a852-43ab2e2f9be5",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "app.run(host='0.0.0.0', port=8080)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 470.538548,
   "end_time": "2021-03-22T20:37:13.369954",
   "environment_variables": {},
   "exception": null,
   "input_path": "/home/jovyan/work/examples/pipelines/pairs/component-library/transform/spark-csv-to-parquet.ipynb",
   "output_path": "/home/jovyan/work/examples/pipelines/pairs/component-library/transform/spark-csv-to-parquet.ipynb",
   "parameters": {},
   "start_time": "2021-03-22T20:29:22.831406",
   "version": "2.3.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
